knitr::opts_chunk$set(echo = TRUE)
We genrated a list of summary statistics for the mRNA reads.
nohup zsh dmel_mRNA_overview.sh > ../logs/dmel_mRNA_overview.log
#!/bin/bash
pyscript="/Volumes/Data/Projects/dmelR2_p-ele/scripts/mRNA-overview.py"
input_dir="/Volumes/Data/Projects/dmelR2_p-ele/rna/run2/map-GSNAP/output"
output_dir="/Volumes/Data/Projects/dmelR2_p-ele/rna/run2/mRNA/overview"
samtools view $input_dir/gt_R1G6.sort.bam | python $pyscript --sam - --sample-id R1-G6-wf > $output_dir/wf_R1G6.txt
samtools view $input_dir/gt_R2G6.sort.bam | python $pyscript --sam - --sample-id R2-G6-wf > $output_dir/wf_R2G6.txt
samtools view $input_dir/gt_R3G6.sort.bam | python $pyscript --sam - --sample-id R3-G6-wf > $output_dir/wf_R3G6.txt
samtools view $input_dir/gt_R1G15.sort.bam | python $pyscript --sam - --sample-id R1-G15-wf > $output_dir/wf_R1G15.txt
samtools view $input_dir/gt_R2G15.sort.bam | python $pyscript --sam - --sample-id R2-G15-wf > $output_dir/wf_R2G15.txt
samtools view $input_dir/gt_R3G15.sort.bam | python $pyscript --sam - --sample-id R3-G15-wf > $output_dir/wf_R3G15.txt
samtools view $input_dir/gt_R1G21.sort.bam | python $pyscript --sam - --sample-id R1-G21-wf > $output_dir/wf_R1G21.txt
samtools view $input_dir/gt_R2G21.sort.bam | python $pyscript --sam - --sample-id R2-G21-wf > $output_dir/wf_R2G21.txt
samtools view $input_dir/gt_R3G21.sort.bam | python $pyscript --sam - --sample-id R3-G21-wf > $output_dir/wf_R3G21.txt
samtools view $input_dir/gt_R1G30.sort.bam | python $pyscript --sam - --sample-id R1-G30-wf > $output_dir/wf_R1G30.txt
samtools view $input_dir/gt_R2G30.sort.bam | python $pyscript --sam - --sample-id R2-G30-wf > $output_dir/wf_R2G30.txt
samtools view $input_dir/gt_R3G30.sort.bam | python $pyscript --sam - --sample-id R3-G30-wf > $output_dir/wf_R3G30.txt
samtools view $input_dir/gt_R1G40.sort.bam | python $pyscript --sam - --sample-id R1-G40-wf > $output_dir/wf_R1G40.txt
samtools view $input_dir/gt_R2G40.sort.bam | python $pyscript --sam - --sample-id R2-G40-wf > $output_dir/wf_R2G40.txt
samtools view $input_dir/gt_R3G40.sort.bam | python $pyscript --sam - --sample-id R3-G40-wf > $output_dir/wf_R3G40.txt
# Combine outputs into single file, separating ID.
# cat *.txt|perl -pe 's/-/\t/g'
# cat *.txt|perl -pe 's/-/\t/'|perl -pe 's/-/\t/' > dmel_all.forr
Then visualised the most relevant ones in ggplot2.
library(ggplot2)
library(scales)
library(dplyr)
overview <- read.table("/Volumes/Data/Projects/dmelR2_p-ele/rna/run2/mRNA/overview_genome/dmel_all.forr",
header = FALSE, sep = "\t")
colnames(overview) <- c("Replicate", "Generation", "Tissue", "Reads", "MappedReads", "MappedReadsWithMinMQ",
"SenseGene", "AntisenseGene", "SenseTranscriptExon", "AntisenseTranscriptExon",
"SensePelement", "AntisensePelement")
generation_order <- c("G6", "G15", "G21", "G30", "G40")
overview$Generation <- factor(overview$Generation, levels = generation_order)
overview$AntisenseGene <- -overview$AntisenseGene
overview$AntisensePelement <- -overview$AntisensePelement
mapped_reads_plot <- ggplot(overview, aes(x = Generation)) +
geom_bar(aes(y = MappedReads, fill = "Mapped reads"), stat = "identity", position = "dodge") +
geom_bar(aes(y = MappedReadsWithMinMQ, fill = "Mapped reads w/ min mq"), stat = "identity", position = "dodge") +
facet_wrap(. ~ Replicate, ncol = 1) +
labs(x = "Generation", y = "Read counts") +
ggtitle("Mapped reads & mapped reads w/ min mq") +
scale_fill_manual(values = c("Mapped reads" = "darkseagreen3", "Mapped reads w/ min mq" = "khaki3"),
name = NULL,
breaks = c("Mapped reads", "Mapped reads w/ min mq"),
labels = c("Mapped reads", "Mapped reads w/ min mq"),
guide = guide_legend(reverse = TRUE)) +
theme_bw() +
theme(legend.position = "bottom") +
scale_y_continuous(labels = label_number(scale = 1e-6, suffix = "M"))
sense_antisense_pelement_plot <- ggplot(overview, aes(x = Generation, y = SensePelement, fill = "Sense")) +
geom_bar(stat = "identity") +
geom_bar(aes(y = AntisensePelement, fill = "Antisense"), stat = "identity") +
facet_wrap(. ~ Replicate, ncol = 1) +
labs(x = "Generation", y = NULL) +
ggtitle("Sense/antisense P-element read counts") +
scale_fill_manual(values = c("Sense" = "lightblue", "Antisense" = "darksalmon"),
name = NULL,
breaks = c("Sense", "Antisense"),
labels = c("Sense", "Antisense"),
guide = guide_legend(reverse = TRUE)) +
theme_bw() +
theme(legend.position = "bottom") +
scale_y_continuous(labels = label_number(scale = 1e-3, suffix = "k"))
combined_plot <- cowplot::plot_grid(mapped_reads_plot, sense_antisense_pelement_plot, ncol = 3)
ggsave("figs/mRNA_sum_stats.png", combined_plot, width = 20, height = 10, dpi = 600)
knitr::include_graphics("figs/mRNA_sum_stats.png")
We then looked at the total expression levels of selected genes across all generations for each of the three replicates.
library(ggplot2)
theme_set(theme_bw())
genes<-c("PPI251","FBtr0083183_mRNA","FBtr0088034_mRNA","FBtr0086904_mRNA","FBtr0087984_mRNA","FBtr0087189_mRNA","FBtr0080497_mRNA","FBtr0079489_mRNA","FBtr0445185_mRNA","FBtr0080316_mRNA","FBtr0075559_mRNA","FBtr0100641_mRNA","FBtr0080165_mRNA","FBtr0081502_mRNA","FBtr0073637_mRNA","FBtr0080166_mRNA","FBtr0301669_mRNA","FBtr0086897_mRNA","FBtr0085594_mRNA","FBtr0329922_mRNA","FBtr0081328_mRNA")
sc<-c("p-element","spnE","cuff","dcr-2","hen1","krimp","loqs","r2d2","vas",
"zuc","ago2-RB","armi-RB","aub","del","moon","piwi","tj-RB","rhino","rpl32","qin-RB","lok")
h<-read.table("/Volumes/Data/Projects/dmelR2_p-ele/rna/run2/mRNA/overview_genome/dmel_all.forr")
names(h)<-c("rep","time","tissue","strand","gene","pos","cov")
for(i in 1:length(genes))
{
target<-genes[i]
a<-subset(h,gene==target)
a$time <- factor(a$time, levels=c("G6", "G15", "G21", "G30","G40"))
s<-subset(a,strand=="se")
as<-subset(a,strand=="ase")
}
mRNA_plot <- ggplot() +
geom_polygon(data=s,mapping=aes(x=pos, y=cov), fill='lightblue', color='lightblue') +
geom_polygon(data=as, aes(x=pos, y=-cov), fill='darksalmon', color='darksalmon')+
facet_grid(time~rep)+
ylab("coverage")+
xlab("position")
ggsave("figs/mRNA_overview.png", mRNA_plot, width = 18, height = 6, dpi = 800)
knitr::include_graphics("figs/mRNA_overview.png")
Then we did the same again but this time looking at the expression levels of each of the selected genes individually, still across generations for each replicate.
library(ggplot2)
library(grid)
theme_set(theme_bw())
genes <- c("PPI251", "FBtr0083183_mRNA", "FBtr0088034_mRNA", "FBtr0086904_mRNA", "FBtr0087984_mRNA", "FBtr0087189_mRNA", "FBtr0080497_mRNA", "FBtr0079489_mRNA", "FBtr0445185_mRNA", "FBtr0080316_mRNA", "FBtr0075559_mRNA", "FBtr0100641_mRNA", "FBtr0080165_mRNA", "FBtr0081502_mRNA", "FBtr0073637_mRNA", "FBtr0080166_mRNA", "FBtr0301669_mRNA", "FBtr0086897_mRNA", "FBtr0085594_mRNA", "FBtr0329922_mRNA", "FBtr0081328_mRNA")
sc <- c("p-element", "spnE", "cuff", "dcr-2", "hen1", "krimp", "loqs", "r2d2", "vas", "zuc", "ago2-RB", "armi-RB", "aub", "del", "moon", "piwi", "tj-RB", "rhino", "rpl32", "qin-RB", "lok")
h <- read.table("/Volumes/Data/Projects/dmelR2_p-ele/rna/run2/splicing-expression/raw_expression/expr-spli.forr")
names(h) <- c("rep", "time", "tissue", "strand", "gene", "pos", "cov")
num_genes <- length(genes)
num_cols <- 5
num_rows <- ceiling(num_genes / num_cols)
plot_new <- function() {
grid.newpage()
pushViewport(viewport(layout = grid.layout(num_rows, num_cols)))
}
# Counter for genes
gene_counter <- 1
plot_new() # Initial plot
for (i in 1:num_rows) {
for (j in 1:num_cols) {
# Check for more genes
if (gene_counter <= num_genes) {
target <- genes[gene_counter]
short <- sc[gene_counter]
a <- subset(h, gene == target)
a$time <- factor(a$time, levels = c("G6", "G15", "G21", "G30", "G40"))
s <- subset(a, strand == "se")
as <- subset(a, strand == "ase")
pushViewport(viewport(layout.pos.row = i, layout.pos.col = j))
plot <- ggplot() +
geom_polygon(data = s, mapping = aes(x = pos, y = cov), fill = 'lightblue', color = 'lightblue') +
geom_polygon(data = as, aes(x = pos, y = -cov), fill = 'darksalmon', color = 'darksalmon') +
facet_grid(time ~ rep) +
ylab("coverage") +
xlab("position") +
ggtitle(paste(target, "-", short))
print(plot)
gene_counter <- gene_counter + 1
}
}
}
vasa, tj, armi
library(ggplot2)
# Read the output file from featureCounts
counts_file <- "/Volumes/Data/Projects/dmelR2_p-ele/rna/run2/map-star/fc/fc_R1G6.txt"
counts_data <- read.table(counts_file, header = TRUE, stringsAsFactors = FALSE)
# Select the genes of interest
selected_genes <- c("FBgn0283442", "FBgn0000964", "FBgn0041164")
# Subset the data for selected genes
selected_data <- counts_data[counts_data$Geneid %in% selected_genes, ]
# Plot expression levels
ggplot(selected_data, aes(x = Geneid, y = "../Aligned.out.sam")) +
geom_bar(stat = "identity", fill = "steelblue") +
xlab("Gene") +
ylab("Expression Level") +
ggtitle("Expression Levels of Selected Genes") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))